####################################
#Diff-in-diff with matching and dual-distance cutoffs - function 
#Traffic and Political Attitudes
#Connor Jerzak & Brian Libgober
####################################

dd_with_dd <- function(my_data, #data frame 
                       treatment_basis_name, #character string 
                       control_basis_names, #character vector 
                       outcome_time1_name, #character string 
                       outcome_time2_name, #character string 
                       matching_variables, #character vector
                       treatment_basis_treated_max, #numeric 
                       treatment_basis_control_min, #numeric 
                       control_basis_control_max, #numeric 
                       control_basis_treated_min, #numeric 
                       boostrap_SDs=F, 
                       bootstrap_numb=NULL, 
                       conf_level=0.95, 
                       match_quietly=T, 
                       start_browser=F, 
                       my_method = "nearest", 
                       my_distance="mahalanobis", 
                       my_discard="none") #logical 
{#begin function brackets 
  require(MatchIt, quietly=T)
  #consider using the boot package 
  
  if(start_browser==T){browser()}
  
  #get treatment basis into memory 
  treatment_basis <- eval(parse(text=paste("my_data$", treatment_basis_name, "")))
  
  #get control basis into memory 
  control_basis_matrix <- matrix(, nrow=nrow(my_data), ncol=length(control_basis_names))
  counter <- 0; for(i in control_basis_names)
  { 
    counter <- counter + 1 
    control_basis_matrix[,counter] <- eval(parse(text=paste("my_data$", i, "")))
  }
  control_basis <- apply(X=control_basis_matrix, MARGIN=1, FUN=min)
  
  #determine treated_indicator 
  my_data <- cbind(my_data, NA) 
  colnames(my_data)[ncol(my_data)] <- "treated_indicator"
  my_data$treated_indicator[treatment_basis <= treatment_basis_treated_max & 
                              control_basis >= control_basis_treated_min] <- 1
  my_data$treated_indicator[control_basis <= control_basis_control_max & 
                              treatment_basis >= treatment_basis_control_min] <- 0 
  summary(my_data$treated_indicator)
  
  #delete variable with is.na(treated_indicator)=T
  my_data <- my_data[is.na(my_data$treated_indicator)==F,]
  remove_columns <- colnames(my_data) %in% c(matching_variables, outcome_time1_name, outcome_time2_name, "treated_indicator")
  my_data <- my_data[,remove_columns]
  
  #create matching formula 
  matching_formula <- paste("treated_indicator ~", paste(matching_variables, collapse="+"))
  
  #create estimation_function 
  estimation_function <- function(match_input=my_data, bootstrap)
  { 
    #create matched dataset
    matched_results <- matchit(as.formula(matching_formula), data = match_input, 
                               method = my_method, 
                               distance= my_distance, 
                               discard= my_discard)
    
    #number of matches in base result 
    if(bootstrap==F){match_numb <- matched_results$nn[,2][2]}
    
    #obtain matched dataset 
    matched_dataset <- match.data(matched_results)
    
    #initialization for the dif-in-dif 
    mean_treat_time1 <- mean(matched_dataset[matched_dataset$treated_indicator==1,which(colnames(matched_dataset)==outcome_time1_name)])
    mean_treat_time2 <- mean(matched_dataset[matched_dataset$treated_indicator==1,which(colnames(matched_dataset)==outcome_time2_name)])
    
    mean_control_time1 <- mean(matched_dataset[matched_dataset$treated_indicator==0,which(colnames(matched_dataset)==outcome_time1_name)])
    mean_control_time2 <- mean(matched_dataset[matched_dataset$treated_indicator==0,which(colnames(matched_dataset)==outcome_time2_name)])
    
    diff_in_diff_estimate <- (mean_treat_time2 - mean_treat_time1) - (mean_control_time2 - mean_control_time1)
    if(bootstrap==F){return(list(diff_in_diff_estimate=diff_in_diff_estimate, match_numb=match_numb))}
    if(bootstrap==T){return(diff_in_diff_estimate)}
  }
  
  #base result 
  base_result_list <- estimation_function(match_input=my_data, bootstrap=F)
  base_result <- base_result_list$diff_in_diff_estimate
  match_numb <- base_result_list$match_numb
  
  #with bootstrap SDs
  if(boostrap_SDs==T)
  { 
    #NOTE: the bootstrap is done AFTER removing out-of-support units. 
    
    #find the number of rows 
    my_nrows <- nrow(my_data)
    
    #generate the bootstrap indices 
    boostrap_indices <- replicate(n=bootstrap_numb, expr=sample(1:my_nrows, size=my_nrows, replace=T))
    
    #perform the bootstrap, using apply 
    bootstrap_replications <- apply(X=boostrap_indices, MARGIN=2, function(x) estimation_function(my_data[x,], bootstrap=T))
    
    #find the 95% quantiles 
    lower <- quantile(bootstrap_replications, (1-conf_level)/2, na.rm=T)
    upper <- quantile(bootstrap_replications, (1+conf_level)/2, na.rm=T)
    
    return_list <- list(base_result=base_result, 
                        lower=lower, 
                        upper=upper, 
                        match_numb=match_numb)
  } 
  
  if(boostrap_SDs==F)
  { 
    return_list <- list(base_result=base_result, 
                        lower=NULL, 
                        upper=NULL, 
                        match_numb=match_numb)
  } 
  return(return_list)
}#end function barkets 
